Newer
Older
spamdb / Scripts / Extract messages.applescript
nstanger on 4 Dec 2002 6 KB Initial import of spam DB stuff.
property WorkingDirectory : "Waiareka HD:Users:nstanger:tmp:"
property WorkingPOSIX : POSIX path of "Waiareka HD:Users:nstanger:tmp:"

global c, cQ, Qc, QcQ

on run
	set c to ", "
	set cQ to ", '"
	set Qc to "', "
	set QcQ to "', '"
	
	
	try
		set msgFile to open for access file (WorkingDirectory & "message.sql") with write permission
	on error e
		my doError(e)
		return
	end try
	
	tell application "Microsoft Entourage" to Â
		set theMessages to get current messages
	
	repeat with thisMsg in theMessages
		
		-- Most of the headers we can just grab from the standard message properties.
		tell application "Microsoft Entourage"
			set theSubject to change "'" into "''" in ((subject of thisMsg) as string)
			set theSender to change "'" into "''" in ((address of sender of thisMsg) as string)
			set thePriority to (priority of thisMsg) as string
			set theHeaders to change "'" into "''" in ((headers of thisMsg) as string)
			set theContent to change "'" into "''" in ((content of thisMsg) as text)
			set theSource to change "'" into "''" in ((source of thisMsg) as string)
			set theCharacterSet to character set of thisMsg
			set theHTML to has html of thisMsg
		end tell -- application "Microsoft Entourage"
		
		-- Grab the headers as a list so we can extract individual headers.
		set headerList to my parseHeaders(paragraphs of theHeaders)
		
		-- We can't just use the time sent property because if the date is weird, Entourage
		-- invariably mangles into something else. We therefore manually extract the sent date
		-- from the headers; at least this way we have a fighting chance of getting something
		-- resembling the actual message date (as long as it's not _too_ misguided).
		set theTimeSent to my extractSentDate(headerList)
		
		set insertMessage to "insert into message (subject, sender, time_sent, " & Â
			"priority, headers, content, source, character_set, has_html) " & Â
			"values ('" & theSubject & QcQ & theSender & QcQ & theTimeSent & QcQ & Â
			thePriority & QcQ & theHeaders & QcQ & theContent & QcQ & theSource & Â
			QcQ & theCharacterSet & Qc & theHTML & ");"
		
		try
			set eof msgFile to 0
			write insertMessage to msgFile
		on error e
			my doError(e)
			close access msgFile
			return
		end try
		
		do shell script "cat " & WorkingPOSIX & "message.sql | tr '\\r' '\\n' > " & WorkingPOSIX & Â
			"message_lf.sql; rm " & WorkingPOSIX & "message.sql"
		
		set theResult to (do shell script "psql -d spam -f " & WorkingPOSIX & "message_lf.sql")
		
		if (theResult ­ "") then
			set theMessageID to my retrieveMessageID(theResult)
			my processRecipients(thisMsg, theMessageID)
			my processHeaders(headerList, theMessageID)
		else
			beep
		end if
		
	end repeat -- with thisMsg in theMessages
	
	close access msgFile
	
end run


-- Get the recipients of a message. Return as a list in the form
-- {{"blah@foo.com", "to"}, "bar@quux.net", "cc"}
on getRecipients(thisMsg)
	local recipientList
	
	set recipientList to {}
	
	tell application "Microsoft Entourage"
		repeat with thisRecipient in recipients of thisMsg
			set recipientList to recipientList & Â
				{{address of address of thisRecipient, first word of (recipient type of thisRecipient as string)}}
		end repeat
	end tell
	
	return recipientList
end getRecipients


-- Build and return a list of message headers in the format
-- {{"header name", "header value"}, {"header name", "header value"}, {...}}.
on parseHeaders(headerList)
	local parsedHeaders, headerName, headerValue
	local thisHeader, oldDelim
	
	set parsedHeaders to {}
	set headerName to ""
	set headerValue to ""
	
	set oldDelim to AppleScript's text item delimiters
	set AppleScript's text item delimiters to {":"}
	
	-- Skip the last item of headerList because it's the blank line terminating the headers.
	repeat with thisHeader in items 1 thru -2 of headerList
		
		-- If a line starts with white space, the header has been folded, so add this line to
		-- the current header instead of creating a new header.
		if (character 1 of thisHeader is in {" ", "	"}) then
			set headerValue to headerValue & return & "  " & contents of thisHeader
			
		else -- It's a new header.
			
			-- If we're already working on a header, store it before starting the new one.
			if (headerName ­ "") then Â
				set parsedHeaders to parsedHeaders & {{headerName, headerValue}}
			
			-- The header name is the text before the first colon (text item 1).
			set headerName to text item 1 of (contents of thisHeader)
			
			-- Everything else is the header value, but we need to strip off the first space;
			-- hence text 2 thru -1.
			set headerValue to text 2 thru -1 of ((text items 2 thru -1 of (contents of thisHeader)) as string)
			
		end if -- character 1 of thisHeader is in {" ", "	"}
		
	end repeat -- with thisHeader in items 1 thru -2 of headerList
	
	set AppleScript's text item delimiters to oldDelim
	
	-- Add the last header to the list before we finish.
	set parsedHeaders to parsedHeaders & {{headerName, headerValue}}
	
	return parsedHeaders
	
end parseHeaders


-- Grabs the date header from the header list. Returns "" if it isn't found.
on extractSentDate(headerList)
	repeat with thisHeader in headerList
		if (item 1 of thisHeader = "Date") then return item 2 of thisHeader
	end repeat -- with thisHeader in headerList
	return ""
end extractSentDate


-- Retrieve the message ID for a message given the result string from inserting it
-- (which contains the PostgreSQL OID of the row).
on retrieveMessageID(theOIDString)
	
	set theOID to second word of theOIDString
	
	-- Use the OID to retrieve the message ID of the message row we just inserted.
	return (do shell script "psql -d spam -c \"select message_id from message where oid = " & Â
		theOID & ";\" -P tuples_only -P format=unaligned")
	
end retrieveMessageID


-- Process and store the recipients of a message.
on processRecipients(thisMsg, theMessageID)
	tell application "Microsoft Entourage"
		repeat with thisRecipient in recipients of thisMsg
			set recipientAddress to address of address of thisRecipient
			set recipientName to change "'" into "''" in (display name of address of thisRecipient as string)
			set recipientType to first word of (recipient type of thisRecipient as string)
			do shell script "psql -d spam -c \"insert into recipient (address, recipient_name, recipient_type, message_id) " & Â
				"values ('" & recipientAddress & QcQ & recipientName & QcQ & recipientType & Qc & theMessageID & ");\""
		end repeat
	end tell
end processRecipients


-- Process and store the headers of a message.
on processHeaders(headerList, theMessageID)
	repeat with thisHeader in headerList
		set theResult to do shell script "psql -d spam -c \"insert into header (header_name, header_value, message_id) " & Â
			"values ('" & item 1 of thisHeader & QcQ & item 2 of thisHeader & Qc & theMessageID & ");\""
		tell me to display dialog theResult
	end repeat
end processHeaders


-- Error handler.
on doError(e)
	beep
	display dialog e
end doError